{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\r\n",
    "!pip install pymysql\r\n",
    "!pip install bs4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pymysql\r\n",
    "import requests\r\n",
    "import time\r\n",
    "import re\r\n",
    "from bs4 import BeautifulSoup\r\n",
    "class mysql:#负责连接数据库的类\r\n",
    "    db=pymysql.connect\r\n",
    "    def __init__(self,url,username,pwd,dbname):\r\n",
    "        self.db = pymysql.connect(url,username,pwd,dbname)\r\n",
    "    def __del__(self):\r\n",
    "        self.db.close()\r\n",
    "    def do(self,sql):#sql:传给数据库执行的sql语句\r\n",
    "        try:\r\n",
    "            cursor = self.db.cursor()\r\n",
    "            cursor.execute(sql)\r\n",
    "            self.db.commit()\r\n",
    "            time.sleep(0.5)\r\n",
    "            return cursor.fetchall()\r\n",
    "        except Exception:\r\n",
    "            print(Exception)\r\n",
    "\r\n",
    "def fun(a,url,yuanBookId):#a:数据库连接对象 url:小说地址\r\n",
    "\thtml=requests.get(url)#获取网页内容\r\n",
    "\t#fo=open('1.html','w')\r\n",
    "\t#fo.write(r.text)\r\n",
    "\tsoup=BeautifulSoup(html.text,'html.parser')#解析网页脚本\r\n",
    "\r\n",
    "\tdef get_t(string):#根据选择器标示获取对应文本\r\n",
    "\t\ts=soup.select(string)\r\n",
    "\t\tif len(s)>0:\r\n",
    "\t\t    return s[0].get_text()\r\n",
    "\t\treturn ''\r\n",
    "\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说名\r\n",
    "\t'''\r\n",
    "\tnovelName=get_t('#novelName')\r\n",
    "\tprint(novelName)\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说分类\r\n",
    "\t'''\r\n",
    "\tType=get_t('.C-One > span:nth-of-type(1) > span:nth-of-type(1) > a:nth-of-type(3)')\r\n",
    "\tprint(Type)\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说标签\r\n",
    "\t'''\r\n",
    "\ttags=soup.select('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Right > div.T-R-Top > div.T-R-T-Box2 > div:nth-of-type(6)')\r\n",
    "\t_tags=''\r\n",
    "\tif len(tags)>0:\r\n",
    "\t\tarray=tags[0].get_text().split('\\n')[1:-1]\r\n",
    "\t\t_tags='|'.join(array)\r\n",
    "\t\tprint(_tags)\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说开更时间\r\n",
    "\t'''\r\n",
    "\tstart_time=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Right > div.T-R-Top > div.T-R-T-Box2 > div:nth-of-type(5) > span > span')\r\n",
    "\tprint(start_time)\r\n",
    "\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说最近更新时间\r\n",
    "\t'''\r\n",
    "\tend_time=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Left > div.T-L-One > div.T-L-O-Zuo > div.T-L-O-Z-Box1 > span > span')\r\n",
    "\tprint(end_time)\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说月票\r\n",
    "\t'''\r\n",
    "\tyuepiao=get_t('body > div.center > div.C-Three.mgTop20.bodyBorderShadow > div:nth-of-type(7) > div.C-Thr-B1-Box3.colorQianlan')\r\n",
    "\tprint(yuepiao)\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取小说点击量\r\n",
    "\t'''\r\n",
    "\tdianji=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Left > div.T-L-One > div.T-L-O-Zuo > div.T-L-O-Z-Box2.fs14 > span:nth-of-type(3) > span')\r\n",
    "\tprint(dianji)\r\n",
    "\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取打赏\r\n",
    "\t'''\r\n",
    "\tdashang = get_t('body > div.center > div.C-Three.mgTop20.bodyBorderShadow > div:nth-of-type(5) > div.C-Thr-B1-Box3.colorQianlan')\r\n",
    "\tprint(dashang)\r\n",
    "\r\n",
    "\t'''\r\n",
    "\t爬取鲜花\r\n",
    "\t'''\r\n",
    "\txianhua = get_t('body > div.center > div.C-Three.mgTop20.bodyBorderShadow > div:nth-of-type(1) > div.C-Thr-B1-Box3.colorFenhong')\r\n",
    "\tprint(xianhua)\r\n",
    "\t\r\n",
    "\t'''\r\n",
    "    获取作者\r\n",
    "    '''\r\n",
    "\tzuozhe=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Left > div.T-L-One > div.T-L-O-Zuo > div.T-L-O-Z-Box1 > a')\r\n",
    "\tprint(zuozhe)\r\n",
    "\t'''\r\n",
    "    获取字数\r\n",
    "    '''\r\n",
    "\t_zishu=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Right > div.T-R-Middle > div:nth-child(2)')\r\n",
    "\tarray=re.findall('\\d+',_zishu)\r\n",
    "\tzishu=0\r\n",
    "\tif len(array)>0:\r\n",
    "\t\tzishu=int(array[0])\r\n",
    "\tprint(zishu)\r\n",
    "\r\n",
    "\ta.do('INSERT INTO novel(novel.`name`,website,type,tag,startTime,endTime,yuepiao,dianji,dashang,xianhua,yuanBookId,zuozhe,zishu) VALUES(\"{}\",\"飞卢\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\")'.format(novelName,Type,_tags,start_time,end_time,yuepiao,dianji,dashang,xianhua,yuanBookId,zuozhe,zishu))\r\n",
    "\r\n",
    "if __name__=='__main__':\r\n",
    "\ta=mysql(\"47.94.92.138\", \"cq\", \"123456\", \"novel\")#连接数据库\r\n",
    "\tfor i in range(0,10):#根据网站小说链接地址规律不断生成链接，利用爬取一定数量的网络小说信息\r\n",
    "\t\tyuanBookId=650001+i\r\n",
    "\t\turl='https://b.faloo.com/f/{}.html'.format(yuanBookId)\r\n",
    "\t\tfun(a,url,yuanBookId)\r\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import requests\r\n",
    "import time\r\n",
    "import re\r\n",
    "import pymysql\r\n",
    "import pandas as pd\r\n",
    "from bs4 import BeautifulSoup\r\n",
    "class mysql:\r\n",
    "    db=pymysql.connect\r\n",
    "    def __init__(self,url,username,pwd,dbname):\r\n",
    "        self.db = pymysql.connect(url,username,pwd,dbname)\r\n",
    "    def __del__(self):\r\n",
    "        self.db.close()\r\n",
    "    def do(self,sql):\r\n",
    "        try:\r\n",
    "            cursor = self.db.cursor()\r\n",
    "            cursor.execute(sql)\r\n",
    "            self.db.commit()\r\n",
    "            time.sleep(0.5)\r\n",
    "            return cursor.fetchall()\r\n",
    "        except Exception:\r\n",
    "            print(Exception)\r\n",
    "\r\n",
    "def fun(a,yuanBookId):\r\n",
    "    url='https://b.faloo.com/f/{}.html'.format(yuanBookId)\r\n",
    "    html=requests.get(url)#获取网页内容\r\n",
    "    soup=BeautifulSoup(html.text,'html.parser')#解析网页脚本\r\n",
    "    def get_t(string):#根据选择器标示获取对应文本\r\n",
    "        s=soup.select('a[class=\"fs14 colorQianHui\"]')\r\n",
    "        if len(s)>0:\r\n",
    "            return s[0].text\r\n",
    "        return ''\r\n",
    "    '''\r\n",
    "    获取作者\r\n",
    "    '''\r\n",
    "    zuozhe=get_t('body>div.center>div.C-Two.bodyBorderShadow>div.Two-Left>div.T-L-One>div.T-L-O-Zuo>div.T-L-O-Z-Box1>a.fs14.colorQianHui')\r\n",
    "    print(zuozhe)\r\n",
    "   \r\n",
    "    a.do('UPDATE novel SET zuozhe=\"{}\" WHERE yuanBookId={}'.format(zuozhe,yuanBookId))\r\n",
    "\r\n",
    "\r\n",
    "a=mysql(\"47.94.92.138\", \"cq\", \"123456\", \"novel\")\r\n",
    "\r\n",
    "bookIds=pd.read_sql('SELECT yuanBookId FROM novel WHERE zuozhe=\"\" AND website=\"飞卢\" ',a.db)\r\n",
    "cc=[]\r\n",
    "print(bookIds.yuanBookId[0])\r\n",
    "fun(a,bookIds.yuanBookId[0])\r\n",
    "for id in bookIds.yuanBookId:\r\n",
    "    id=str(round(id,0))\r\n",
    "    print(id)\r\n",
    "    fun(a,str(id))\r\n",
    "\r\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "PaddlePaddle 2.0.0b0 (Python 3.5)",
   "language": "python",
   "name": "py35-paddle1.2.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
